Project description

Effect of cohesin looping on genome architecture, from the perspective of the nuclear lamina.

Introduction

In a previous document, I classified LAD borders as 1) CTCF-bound or not 2) shared or unique. Here, I will visualize what the effect is of CTCF depletion on the various types of LAD borders. Also, the effect of the other depletions.

This is a variation on another document. Instead of selecting borders that do not overlap with active genes, here I specifically look at borders that do overlap.

Method

Load (z-scale) DamID tracks and plot effect on different types of LAD borders.

Set-up

Load the libraries and set the parameters.

# Load dependencies
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(GenomicRanges))
suppressPackageStartupMessages(library(rtracklayer))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(caTools))
suppressPackageStartupMessages(library(yaml))
suppressPackageStartupMessages(library(M3C))
suppressPackageStartupMessages(library(ggbeeswarm))
suppressPackageStartupMessages(library(broom))
suppressPackageStartupMessages(library(corrr))

bin.size <- "10kb"
damid.dir <- file.path("../results_NQ/normalized/", paste0("bin-", bin.size))

# Prepare output 
output.dir <- "ts220113_effect_of_CTCF_depletion_on_LAD_borders_with_active_genes"
dir.create(output.dir, showWarnings = FALSE)


# Prepare input
input.dir <- "ts220113_CTCF_enrichment_at_LAD_borders"
metadata.borders <- readRDS(file.path(input.dir, "metadata.rds"))
LADs <- readRDS(file.path(input.dir, "LADs.rds")) 
LAD.borders <- readRDS(file.path(input.dir, "LAD_borders.rds"))
CTCF.sites <- readRDS(file.path(input.dir, "CTCF_sites.rds"))

LADs <- readRDS(file.path(input.dir, "LADs_pA.rds"))
LAD.borders <- readRDS(file.path(input.dir, "LAD_borders_pA.rds"))

borders <- LAD.borders[["mESC_pA_PT"]]
borders$class <- "xxx"

Prepare knitr output.

library(knitr)
opts_chunk$set(fig.width = 10, fig.height = 4, cache = T,
               message = F, warning = F,
               dev=c('png', 'pdf'), fig.path = file.path(output.dir, "figures/")) 
pdf.options(useDingbats = FALSE)

List functions.

LoadDamID <- function(metadata, damid.dir, column = "file") {
  
  # Load data
  tib <- purrr::reduce(lapply(1:nrow(metadata),
                              function(i) {
                                f <- metadata[i, ] %>% pull(column)
                                s <- as.character(metadata$sample)[i]
                                tmp <- read_tsv(file.path(damid.dir, f), 
                                                col_names = c("seqnames", "start", "end", s))
                              }),
                       full_join)
  
  # Convert to GRanges
  tib$start <- tib$start + 1
  gr <- as(tib, "GRanges")
  
  # Filter chromosomes
  gr <- gr[seqnames(gr) %in% c(paste0("chr", 1:22), "chrX")]
  gr
  
}

LoadDamIDCounts <- function(metadata, damid.dir.counts, 
                            yaml = "../bin/snakemake/config.yaml") {
  
  # List samples from yaml file
  yaml_list <- read_yaml(yaml)
  
  # Read lmnb1 counts
  idx <- which(names(yaml_list$dam_controls) %in% unlist(yaml_list$replicates))
  
  lmnb1_files <- names(yaml_list$dam_controls)[idx]
  dam_files <- unlist(yaml_list$dam_controls)[idx]
  
  # Prepare counts metadata
  metadata_counts <- tibble(lmnb1 = lmnb1_files,
                            dam = dam_files) %>%
    mutate(sample = str_remove(lmnb1, "pADamID_NQ_"),
           sample = str_remove(sample, "pADamID_"),
           sample = str_remove(sample, "-")) %>%
    separate(sample, c("condition", "timepoint", "replicate"), remove = F) %>%
    mutate(idx = match(paste0(condition, timepoint),
                       paste0(metadata$condition, metadata$timepoint)),
           group = metadata$sample[idx],
           lmnb1 = paste0(lmnb1, "-10kb.counts.txt.gz"),
           dam = paste0(dam, "-10kb.counts.txt.gz")) %>%
    arrange(idx, replicate)
  
  # Read lmnb1 files
  gr_lmnb1 <- LoadDamID(metadata_counts, damid.dir.counts, column = "lmnb1")
  
  # CPM and mean of replicates
  tib <- as_tibble(mcols(gr_lmnb1)) %>%
    mutate_all(function(x) x / sum(x) * 1e6) %>%
    add_column(bin = 1:nrow(.)) %>%
    gather(key, value, -bin) %>%
    mutate(sample = metadata_counts$group[match(key, metadata_counts$sample)]) %>%
    group_by(bin, sample) %>%
    summarise(mean = mean(value)) %>%
    ungroup() %>%
    spread(sample, mean) %>%
    dplyr::select(-bin)
  
  mcols(gr_lmnb1) <- tib
  
  # Read dam files
  gr_dam <- LoadDamID(metadata_counts, damid.dir.counts, column = "dam")
  
  # CPM and mean of replicates
  tib <- as_tibble(mcols(gr_dam)) %>%
    mutate_all(function(x) x / sum(x) * 1e6) %>%
    add_column(bin = 1:nrow(.)) %>%
    gather(key, value, -bin) %>%
    mutate(sample = metadata_counts$group[match(key, metadata_counts$sample)]) %>%
    group_by(bin, sample) %>%
    summarise(mean = mean(value)) %>%
    ungroup() %>%
    spread(sample, mean) %>%
    dplyr::select(-bin)
  
  mcols(gr_dam) <- tib
  
  list(gr_lmnb1, gr_dam)
  
}

PlotDensity <- function(damid, title, xlimits = NULL, replicate = F, conditions = NULL) {
  
  if (replicate) {
    tib <- as_tibble(mcols(damid)) %>%
      gather() %>%
      separate(key, c("condition", "timepoint", "replicate"), remove = F, sep = "_") %>%
      mutate(condition = factor(condition, levels = levels(metadata.damid$condition)),
             timepoint = factor(timepoint, levels = levels(metadata.damid$timepoint)),
             replicate = factor(replicate, levels = paste0("r", 1:10))) %>%
      drop_na()
  } else {
    tib <- as_tibble(mcols(damid)) %>%
      gather() %>%
      separate(key, c("condition", "timepoint"), remove = F, sep = "_") %>%
      mutate(condition = factor(condition, levels = levels(metadata.damid$condition)),
             timepoint = factor(timepoint, levels = levels(metadata.damid$timepoint))) %>%
      drop_na()
  }
  
  nrow <- 2
  if (! is.null(conditions)) {
    tib <- tib %>%
      filter(condition %in% conditions)
    nrow <- 1
  }
  
  plt <- tib %>%
    ggplot(aes(x = value, col = timepoint)) +
    #geom_density() +
    facet_wrap( ~ condition, nrow = nrow) +
    ggtitle(title) +
    xlab("DamID score") +
    ylab("Density") +
    scale_color_brewer(palette = "Dark2") +
    theme_bw() +
    theme(aspect.ratio = 1)
  
  if (replicate) {
    plt <- plt + geom_density(aes(linetype = replicate))
  } else {
    plt <- plt + geom_density()
  }
  
  if (! is.null(xlimits)) {
    plt <- plt +
      coord_cartesian(xlim = xlimits)
  }
  
  plt
  
}

ScaleDamID <- function(damid) {
  tib <- as_tibble(mcols(damid))
  tib.scaled <- as_tibble(scale(tib))
  mcols(damid) <- tib.scaled
  damid
}

grMid <- function(gr) {
  start(gr) <- end(gr) <- rowMeans(cbind(start(gr), end(gr)))
  gr
}

DistanceToLADBorder <- function(sites, borders, nearest = T, border.class = F) {
  # Given a GRanges of damid and LAD borders, calculate the 
  # distance to the preceding and following LAD. Return a new GRanges
  
  # Make sure the chromosomes are as they should be - especially for the bins
  sites <- sites[seqnames(sites) %in% c(paste0("chr", 1:22),
                                        "chrX")]
  borders <- borders[seqnames(borders) %in% c(paste0("chr", 1:22),
                                              "chrX")]
  
  # Preceding distance
  sites$dis.precede <- sites$strand.precede <- 
    sites$border.class.precede <- sites$border.ctcf.precede <- NA
  idx.precede <- precede(sites, borders, ignore.strand = T, select = "all")
  sites$dis.precede[queryHits(idx.precede)] <- distance(sites[queryHits(idx.precede)], 
                                                        borders[subjectHits(idx.precede)], 
                                                        ignore.strand = T)
  sites$strand.precede[queryHits(idx.precede)] <- strand(borders[subjectHits(idx.precede)])
  
  sites$border.class.precede[queryHits(idx.precede)] <- borders$class[subjectHits(idx.precede)]
  sites$border.ctcf.precede[queryHits(idx.precede)] <- borders$CTCF[subjectHits(idx.precede)]
  
  
  # Following distance
  sites$dis.follow <- sites$strand.follow <- 
    sites$border.class.follow <- sites$border.ctcf.follow <- NA
  idx.follow <- follow(sites, borders, ignore.strand = T, select = "all")
  sites$dis.follow[queryHits(idx.follow)] <- distance(sites[queryHits(idx.follow)], 
                                                      borders[subjectHits(idx.follow)], 
                                                      ignore.strand = T)
  sites$strand.follow[queryHits(idx.follow)] <- strand(borders[subjectHits(idx.follow)])
  
  sites$border.class.follow[queryHits(idx.follow)] <- borders$class[subjectHits(idx.follow)]
  sites$border.ctcf.follow[queryHits(idx.follow)] <- borders$CTCF[subjectHits(idx.follow)]
  
  
  # Exception: overlapping (follow = distance (0), precede = NA)
  idx.overlap <- findOverlaps(sites, borders, ignore.strand = T)
  sites$dis.follow[queryHits(idx.overlap)] <- distance(sites[queryHits(idx.overlap)], 
                                                       borders[subjectHits(idx.overlap)], 
                                                       ignore.strand = T)
  sites$dis.precede[queryHits(idx.overlap)] <- NA
  sites$strand.follow[queryHits(idx.overlap)] <- sites$strand.precede[queryHits(idx.overlap)] <- 
    strand(borders[subjectHits(idx.overlap)])
  
  sites$border.class.precede[queryHits(idx.overlap)] <- 
    sites$border.class.follow[queryHits(idx.overlap)] <-
    borders$class[subjectHits(idx.overlap)]
  sites$border.ctcf.precede[queryHits(idx.overlap)] <- 
    sites$border.ctcf.follow[queryHits(idx.overlap)] <-
    borders$CTCF[subjectHits(idx.overlap)]
  
  
  # Alternative: only use information from the nearest hit
  if (nearest) {
    # Remove precede information if follow is smaller
    idx.remove.precede <- which(sites$dis.follow < sites$dis.precede)
    sites$dis.precede[idx.remove.precede] <- NA
    # Remove follow information if precede is smaller
    idx.remove.follow <- which(sites$dis.follow > sites$dis.precede)
    sites$dis.follow[idx.remove.follow] <- NA
  }
  
  sites
  
}

CountPerBins <- function(sites, bin.size = 10000) {
  
  tib <- as_tibble(mcols(sites)) %>%
    add_column(number = 1:nrow(.)) %>%
    mutate(dis.precede.group = as.numeric(cut(dis.precede, 
                                              breaks = seq(0, max(dis.precede, na.rm = T) + 1, 
                                                           by = bin.size))) - 1,
           dis.follow.group = as.numeric(cut(dis.follow, 
                                             breaks = seq(0, max(dis.follow, na.rm = T) + 1, 
                                                          by = bin.size))) - 1) %>%
    dplyr::select(-dis.precede, -dis.follow) %>%
    mutate(dis.precede.group = ifelse(strand.precede == "+", 
                                      -dis.precede.group, dis.precede.group),
           dis.follow.group = ifelse(strand.follow == "+", 
                                     dis.follow.group, -dis.follow.group)) %>%
    dplyr::select(-strand.precede, -strand.follow) %>%
    gather(key, value, dis.precede.group, dis.follow.group) %>%
    drop_na() %>%
    mutate(border.class = ifelse(key == "dis.precede.group", 
                                 border.class.precede, border.class.follow),
           border.ctcf = ifelse(key == "dis.precede.group", 
                                border.ctcf.precede, border.ctcf.follow)) %>%
    dplyr::select(-border.class.precede, -border.class.follow, 
                  -border.ctcf.precede, -border.ctcf.follow) %>%
    gather(sample, score, -number, -key, -value, -border.class, -border.ctcf) %>%
    group_by(value, border.class, border.ctcf, sample) %>%
    summarise(count = n(),
              mean = mean(score)) %>%
    ungroup() %>%
    mutate(sample = factor(sample, levels = levels(metadata.damid$sample)),
           border.ctcf = factor(border.ctcf, levels = c("nonCTCF", "CTCF")),
           border.class = factor(border.class, levels = c("shared", "unique"))) %>%
    arrange(sample, border.class, border.ctcf)
  
  tib
  
}

PlotDamIDScoresAndDifferences <- function(damid.summary, 
                                          xlimits = c(-0.4, 0.4),
                                          ylimits.diff = c(-0.3, 0.2),
                                          extra_grouping = NULL) {
  tib <- damid.summary %>%
    mutate(mean = runmean(mean, k = 5)) %>%
    group_by_at(c("sample", "value", extra_grouping)) %>%
    summarise(combined.count = sum(count),
              combined.mean = weighted.mean(mean, count)) %>%
    filter(combined.count > 20) %>%
    separate(sample, c("condition", "timepoint"), remove = F) %>%
    mutate(condition = factor(condition, levels = levels(metadata.damid$condition)),
           timepoint = factor(timepoint, levels = levels(metadata.damid$timepoint))) %>%
    ungroup()
  
  plt <- tib %>%
    ggplot(aes(x = value / 100, y = combined.mean, col = border.ctcf)) +
    annotate("rect", xmin = 0, xmax = 1e3, ymin = -1, ymax = 5, 
             fill = "grey", alpha = 0.3) +  
    geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
    geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
    geom_line(size = 1) +
    facet_grid(as.formula(paste(paste(c("condition"), 
                                      collapse = " + "), 
                                "~", "timepoint"))) +
    ggtitle("DamID scores at LAD borders") +
    xlab("Distance from LAD border (Mb)") +
    ylab("DamID (z-score)") +
    coord_cartesian(xlim = xlimits, ylim = c(-1, 1)) +
    scale_color_manual(values = c("blue", "red")) +
    theme_bw() +
    theme(aspect.ratio = 1,
          axis.text.x = element_text(angle = 90, hjust = 1))
  plot(plt)
  
  # Also, plot the difference between 0h and the others
  tib.difference <- tib %>%
    dplyr::select(-sample, -combined.count) %>%
    spread(key = timepoint, value = combined.mean) %>%
    mutate(diff.6h = `6h` - `0h`,
           diff.24h = `24h` - `0h`,
           diff.96h = `96h` - `0h`) %>%
    dplyr::select(-`0h`, -`6h`, -`24h`, -`96h`) %>%
    gather(timepoint, combined.difference, diff.6h, diff.24h, diff.96h) %>%
    mutate(timepoint = factor(timepoint, levels = c("diff.6h", "diff.24h", "diff.96h")))
  
  plt <- tib.difference %>%
    ggplot(aes(x = value / 100, y = combined.difference, col = border.ctcf)) +
    annotate("rect", xmin = 0, xmax = 1e3, ymin = -1, ymax = 5, 
             fill = "grey", alpha = 0.3) +  
    geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
    geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
    geom_line(size = 1) +
    facet_grid(as.formula(paste(paste(c("condition"), 
                                      collapse = " + "), 
                                "~", "timepoint"))) +
    ggtitle("DamID difference at LAD borders") +
    xlab("Distance from LAD border (Mb)") +
    ylab("DamID difference (z-score)") +
    coord_cartesian(xlim = xlimits, ylim = ylimits.diff) +
    scale_color_manual(values = c("blue", "red")) +
    theme_bw() +
    theme(aspect.ratio = 1,
          axis.text.x = element_text(angle = 90, hjust = 1))
  plot(plt)
  
  # Also, plot the difference between CTCF and non-CTCF borders
  tib.difference <- tib %>%
    dplyr::select(-sample, -combined.count) %>%
    spread(key = border.ctcf, value = combined.mean) %>%
    mutate(diff = CTCF - nonCTCF) %>%
    dplyr::select(-nonCTCF, -CTCF)
  
  plt <- tib.difference %>%
    ggplot(aes(x = value / 100, y = diff, col = timepoint)) +
    annotate("rect", xmin = 0, xmax = 1e3, ymin = -1, ymax = 5, 
             fill = "grey", alpha = 0.3) +  
    geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
    geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
    geom_line(size = 1) +
    facet_wrap( ~ condition, nrow = 2) +
    ggtitle("DamID difference at CTCF borders") +
    xlab("Distance from LAD border (Mb)") +
    ylab("DamID difference (z-score)") +
    coord_cartesian(xlim = xlimits, ylim = ylimits.diff) +
    scale_color_brewer(palette = "Set1") +
    theme_bw() +
    theme(aspect.ratio = 1,
          axis.text.x = element_text(angle = 90, hjust = 1))
  plot(plt)
  
}

1. Load data

1.1 Previous data

Load the required data. First, the previous data sets that I need.

# See above

1.2 DamID data

Also, load DamID data. I will work with combined data tracks of replicates.

Also, load the counts. These can be used to illustrate whether any effects are caused by a change in accessibility or Lamin B1 reads.

# Prepare DamID metadata
metadata.damid <- tibble(dir(damid.dir, pattern = "combined.*norm.txt.gz"),
                         .name_repair = ~ c("file")) %>%
  mutate(sample = str_remove(str_remove(file, "pADamID_"),
                             paste0("-", bin.size, ".*"))) %>%
  mutate(sample = str_remove(sample, "^NQ_")) %>%
  mutate(sample = str_replace_all(sample, "-", "")) %>%
  separate(sample, c("condition", "timepoint"), remove = F, sep = "_") %>%
  mutate(condition = factor(condition, levels = c("PT", "CTCFEL", "CTCFNQ", 
                                                  "CTCF", "RAD21", "WAPL", "CTCFWAPL")),
         timepoint = factor(timepoint, levels = c("0h", "6h", "24h", "96h"))) %>%
  arrange(condition, timepoint) %>%
  add_column(cell = factor("mESC")) %>%
  mutate(sample = factor(sample, levels = unique(sample))) %>%
  drop_na()

# Load DamID
damid <- LoadDamID(metadata.damid, damid.dir)

Before I can continue with the DamID data, I want to convert the values first to z-scores. Also, make density plots.

# Scale DamID
damid <- ScaleDamID(damid)

2. DamID scores at LAD borders

I need to add the distance to LAD border (and which LAD border) to the DamID data. First, filter LAD borders for positioning near active genes.

Note that most of these analyses are old and removed. Instead, see below for better analyses of LAD borders, where I process all borders individually.

# Only for borders without genes
borders <- borders[borders$ovl_gene == T] 

Having these distances, I can now start plotting the DamID profile around LAD borders. Also, I can plot the difference between the later timepoints and 0h.

3. Individual LAD borders

The previous plots are all based on average effects. Here, I will try to work with individual LAD borders, to see if the real differences are only for a subset of CTCF borders.

First, I will determine the distance to LAD borders for all genomic bins. Use LAD borders defined in the pA-DamID data for the parental clone.

GatherBorders <- function(damid, borders, lads) {
  
  # Get the distances to the nearest LAD borders for all damid bins
  damid.mid <- damid
  start(damid.mid) <- end(damid.mid) <- (start(damid.mid) + end(damid.mid)) / 2
  
  dis <- as_tibble(distanceToNearest(damid.mid, borders))
  
  # Round distances to the nearest 5kb
  dis <- dis %>%
    mutate(distance = ceiling(distance / 5000) * 5000)
  
  # Also, determine which bins overlap with lads
  ovl <- damid.mid %over% lads
  
  # Process data as tibble
  tib <- as_tibble(damid.mid) %>%
    add_column(border_idx = dis$subjectHits,
               border_ctcf = borders$CTCF[dis$subjectHits],
               border_ctcf_n = borders$CTCF.count[dis$subjectHits],
               border_ctcf_strand = borders$CTCF_strand[dis$subjectHits],
               distance = dis$distance,
               within_lad = ovl) %>%
    mutate(distance = case_when(within_lad == 1 ~ distance,
                                T ~ -distance),
           border_ctcf_strand = factor(border_ctcf_strand,
                                       levels = c("outwards", "inwards",
                                                  "ambiguous", "nonCTCF")),
           border_ctcf_n = case_when(border_ctcf_n >= 3 ~ ">=3", 
                                     border_ctcf_n == 2 ~ "2",
                                     border_ctcf_n == 1 ~ "1",
                                     T ~ "nonCTCF"),
           border_ctcf_n = factor(border_ctcf_n,
                                  levels = c("nonCTCF", "1", "2", ">=3"))) %>%
    filter(abs(distance) < 2e5)
  
  # # Only work with "complete" borders (remove small iLADs / LADs)
  # borders_complete <- which(as.numeric(table(tib$border_idx)) > 25)
  # 
  # tib <- tib %>%
  #   filter(border_idx %in% borders_complete)
  
  # Plot all lines as "test"
  tib %>%
    ggplot(aes(x = distance / 1e3, y = PT_0h)) +
    #geom_line(aes(group = border_idx), alpha = 0.1) +
    geom_smooth(aes(group = border_ctcf, col = border_ctcf), se = T) +
    xlab("Distance to LAD border (kb)") +
    ylab("pA-DamID (z-score)") +
    theme_bw() +
    theme(aspect.ratio = 1)
  
  tib
  
}

tib_damid <- GatherBorders(damid, borders = borders, 
                           lads = LADs[["mESC_pA_PT"]])

Next, plot the effect of depletion at LAD borders. I will prepare several figures / results here:

  • Average profile (+ conf. int.) of all borders.
  • Per-border effect outside of LAD borders.
  • Statistics of this.

Not only do this for depletions at +/- CTCF borders, but also based on CTCF strand and CTCF count. Also, for the unnormalized data (as cpm) to illustrate whether the effects are caused by differences in LaminB1 or Dam signal.

sd_fun <- function(x, y = 1.3) {
  return(data.frame(y = y, label = round(sd(x), 2)))
}

PlotBordersWithConfidenceIntervals <- function(tib, samples,
                                               ylim = c(-0.9, 0.65),
                                               smooth = 1,
                                               group = "border_ctcf",
                                               keep_PT = F,
                                               filter_96h = T) {
  
  if (filter_96h) {
    samples <- samples[! grepl("96h", samples)]
  }
  
  # Gather tib
  if (smooth != 1) {
    tib <- tib %>%
      mutate_at(vars(ends_with("h")), runmean, k = smooth, endrule = "mean")
  }
  
  tib_gather <- tib %>%
    gather(key, value, 
           grep("[0-9]+h$", names(tib), value = T)) %>%
    mutate(key = factor(key, levels = levels(metadata.damid$sample)))
  tib_gather$group <- tib_gather %>% pull(group)
  
  if (keep_PT) {
    plt <- tib_gather %>%
      filter(key %in% samples) %>%
      ggplot(aes(x = distance / 1e3, y = value, 
                 group = key, col = key, fill = key)) +
      annotate("rect", xmin = 0, xmax = 300, ymin = -10, ymax = 10, 
               fill = "grey", alpha = 0.3) +  
      geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
      geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
      #geom_line(aes(group = border_idx), alpha = 0.1) +
      #geom_smooth(aes(group = key, col = key), se = T) +
      stat_summary(fun = mean, geom = "line", size = 1) +
      stat_summary(fun.data = mean_se, geom = "ribbon", alpha = 0.25, col = NA,
                   fun.args = list(mult = 1.96)) +
      #facet_grid(. ~ border_ctcf) +
      facet_grid(group ~ .) +
      xlab("Distance to LAD border (kb)") +
      ylab("pA-DamID (z-score)") +
      coord_cartesian(xlim = c(-200, 200), ylim = ylim, expand = F) +
      scale_color_brewer(palette = "Set1", name = "Border class") +
      scale_fill_brewer(palette = "Set1", name = "Border class", guide = "none") +
      theme_bw() +
      theme(aspect.ratio = 1,
            axis.text.x = element_text(angle = 90, hjust = 1))
    plot(plt)
    
    samples_without_pt <- samples
    
    if (length(samples) == 1) return(NULL)
    
  } else {
    
    # Plot some tracks
    plt <- tib_gather %>%
      filter(key %in% samples & key != "PT_0h") %>%
      ggplot(aes(x = distance / 1e3, y = value, 
                 group = key, col = key, fill = key)) +
      annotate("rect", xmin = 0, xmax = 300, ymin = -10, ymax = 10, 
               fill = "grey", alpha = 0.3) +  
      geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
      geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
      #geom_line(aes(group = border_idx), alpha = 0.1) +
      #geom_smooth(aes(group = key, col = key), se = T) +
      stat_summary(fun = mean, geom = "line", size = 1) +
      stat_summary(fun.data = mean_se, geom = "ribbon", alpha = 0.25, col = NA,
                   fun.args = list(mult = 1.96)) +
      #facet_grid(. ~ border_ctcf) +
      facet_grid(group ~ .) +
      xlab("Distance to LAD border (kb)") +
      ylab("pA-DamID (z-score)") +
      coord_cartesian(xlim = c(-200, 200), ylim = ylim, expand = F) +
      scale_color_brewer(palette = "Set1", name = "Border class") +
      scale_fill_brewer(palette = "Set1", name = "Border class", guide = "none") +
      theme_bw() +
      theme(aspect.ratio = 1,
            axis.text.x = element_text(angle = 90, hjust = 1))
    plot(plt)
    
    samples_without_pt <- samples[2:length(samples)]
    
  }
  
  # # Can I also quantify the difference within the LAD?
  # tib_ctcf <- tib %>%
  #   filter(distance > 20e3 & distance < 120e3) %>%
  #   dplyr::select(border_idx, all_of(samples_without_pt), all_of(group)) %>%
  #   dplyr::rename_at(vars(group), ~ "group") %>%
  #   group_by(border_idx, group) %>%
  #   summarise_at(samples_without_pt, mean, na.rm = T) %>%
  #   ungroup() %>%
  #   dplyr::rename_at(samples_without_pt[1], ~c("base")) %>%
  #   mutate_at(samples_without_pt[2:length(samples_without_pt)], 
  #             function(x) x - .$base) %>%
  #   dplyr::select(border_idx, group, 
  #                 samples_without_pt[2:length(samples_without_pt)]) %>%
  #   gather(key, value, -group, -border_idx) %>%
  #   mutate(key = factor(key, samples_without_pt),
  #          group = factor(group, levels = c("nonCTCF",
  #                                           "CTCF",
  #                                           "outwards", "inwards", "ambiguous",
  #                                           "1", "2", ">=3")))
  # 
  # # Plot by time point
  # plt <- tib_ctcf %>%
  #   ggplot(aes(x = group, y = value, col = group)) +
  #   geom_quasirandom() +
  #   geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
  #   geom_hline(yintercept = 0, col = "darkgrey", linetype = "dashed") +
  #   facet_grid(. ~ key) +
  #   coord_cartesian(ylim = c(-1.5, 1.5)) +
  #   scale_color_brewer(palette = "Set2", guide = "none") +
  #   xlab("") +
  #   ylab("Difference within LAD with t=0h") +
  #   theme_bw() +
  #   theme(aspect.ratio = 1.5,
  #         axis.text.x = element_text(angle = 90, hjust = 1))
  # plot(plt)
  
  
  # Can I also quantify the difference between the local CTCF depletion?
  tib_ctcf <- tib %>%
    filter(distance > -20e3 & distance < 0) %>%
    dplyr::select(border_idx, all_of(samples_without_pt), all_of(group)) %>%
    dplyr::rename_at(vars(group), ~ "group") %>%
    group_by(border_idx, group) %>%
    summarise_at(samples_without_pt, mean, na.rm = T) %>%
    ungroup() %>%
    dplyr::rename_at(samples_without_pt[1], ~c("base")) %>%
    mutate_at(samples_without_pt[2:length(samples_without_pt)], 
              function(x) x - .$base) %>%
    dplyr::select(border_idx, group, 
                  samples_without_pt[2:length(samples_without_pt)]) %>%
    gather(key, value, -group, -border_idx) %>%
    mutate(key = factor(key, samples_without_pt),
           group = factor(group, levels = c("nonCTCF",
                                            "CTCF",
                                            "outwards", "inwards", "ambiguous",
                                            "1", "2", ">=3")))
  
  # Plot by time point
  plt <- tib_ctcf %>%
    ggplot(aes(x = group, y = value, col = group)) +
    geom_quasirandom() +
    geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
    geom_hline(yintercept = 0, col = "darkgrey", linetype = "dashed") +
    stat_summary(fun.data = sd_fun, geom = "text", col = "black") +
    facet_grid(. ~ key) +
    coord_cartesian(ylim = c(-1.5, 1.5)) +
    scale_color_brewer(palette = "Set2", guide = "none") +
    xlab("") +
    ylab("Difference outside LAD border with t=0h") +
    theme_bw() +
    theme(aspect.ratio = 1.5,
          axis.text.x = element_text(angle = 90, hjust = 1))
  plot(plt)
  
  # # Plot by border class
  # plt <- tib_ctcf %>%
  #   ggplot(aes(x = key, y = value, col = key)) +
  #   geom_quasirandom() +
  #   geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
  #   geom_hline(yintercept = 0, col = "darkgrey", linetype = "dashed") +
  #   facet_grid(. ~ group) +
  #   scale_color_brewer(palette = "Set1", guide = F) +
  #   xlab("") +
  #   ylab("Difference outside LAD border with t=0h") +
  #   theme_bw() +
  #   theme(aspect.ratio = 1.5,
  #         axis.text.x = element_text(angle = 90, hjust = 1))
  # plot(plt)
  
  # Statistics - difference from diff = 0
  tib_ctcf %>%
    group_by(group, key) %>%
    dplyr::summarise(pvalue = wilcox.test(value)$p.value) %>%
    ungroup() %>%
    mutate(padj = p.adjust(pvalue),
           sign = padj < 0.05) %>%
    print(n = 40)
  
  # Statistics - difference with nonCTCF borders
  tib_stat <- tibble()
  
  for (current_group in levels(tib_ctcf$group)) {
    for (current_key in levels(tib_ctcf$key)) {
      
      if (! current_group %in% tib_ctcf$group) next
      if (current_group %in% "nonCTCF") next
      if (! current_key %in% tib_ctcf$key) next
      
      
      tmp <- wilcox.test(tib_ctcf %>%
                           filter(group == current_group &
                                    key == current_key) %>%
                           pull(value),
                         tib_ctcf %>%
                           filter(group == "nonCTCF" &
                                    key == current_key) %>%
                           pull(value))
      
      tib_stat <- bind_rows(tib_stat,
                            tibble(group = current_group,
                                   key = current_key,
                                   pvalue = tmp$p.value))
    }
  }
  
  tib_stat %>%
    mutate(padj = p.adjust(pvalue),
           sign = padj < 0.05) %>%
    print(n = 40)
  
  tib_stat
  
}

# CTCF vs non-CTCF
PlotBordersWithConfidenceIntervals(tib_damid, 
                                   c("PT_0h", "CTCFEL_0h",
                                     "RAD21_0h", "WAPL_0h",
                                     "CTCFWAPL_0h"),
                                   keep_PT = T)

## # A tibble: 8 × 5
##   group   key           pvalue          padj sign 
##   <fct>   <fct>          <dbl>         <dbl> <lgl>
## 1 nonCTCF CTCFEL_0h   2.87e- 3 0.0172        TRUE 
## 2 nonCTCF RAD21_0h    3.14e- 1 0.628         FALSE
## 3 nonCTCF WAPL_0h     2.05e- 1 0.614         FALSE
## 4 nonCTCF CTCFWAPL_0h 9.39e- 3 0.0376        TRUE 
## 5 CTCF    CTCFEL_0h   3.19e- 1 0.628         FALSE
## 6 CTCF    RAD21_0h    1.51e- 6 0.0000106     TRUE 
## 7 CTCF    WAPL_0h     5.45e-10 0.00000000436 TRUE 
## 8 CTCF    CTCFWAPL_0h 4.45e- 3 0.0223        TRUE 
## # A tibble: 4 × 5
##   group key            pvalue      padj sign 
##   <chr> <chr>           <dbl>     <dbl> <lgl>
## 1 CTCF  CTCFEL_0h   0.00637   0.00756   TRUE 
## 2 CTCF  RAD21_0h    0.00378   0.00756   TRUE 
## 3 CTCF  WAPL_0h     0.0000236 0.0000942 TRUE 
## 4 CTCF  CTCFWAPL_0h 0.000111  0.000334  TRUE
## # A tibble: 4 × 3
##   group key            pvalue
##   <chr> <chr>           <dbl>
## 1 CTCF  CTCFEL_0h   0.00637  
## 2 CTCF  RAD21_0h    0.00378  
## 3 CTCF  WAPL_0h     0.0000236
## 4 CTCF  CTCFWAPL_0h 0.000111
tib_stat <- tibble()

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "PT_24h"),
                                          keep_PT = T, ylim = c(-0.8, 0.8))

## # A tibble: 2 × 5
##   group   key       pvalue     padj sign 
##   <fct>   <fct>      <dbl>    <dbl> <lgl>
## 1 nonCTCF PT_24h 0.154     0.154    FALSE
## 2 CTCF    PT_24h 0.0000526 0.000105 TRUE 
## # A tibble: 1 × 5
##   group key    pvalue   padj sign 
##   <chr> <chr>   <dbl>  <dbl> <lgl>
## 1 CTCF  PT_24h 0.0693 0.0693 FALSE
tib_stat <- bind_rows(tib_stat, tmp)

PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "PT_24h", "CTCFEL_0h", "CTCFEL_6h", 
                                            "CTCFEL_24h", "CTCFEL_96h"), 
                                          keep_PT = T, ylim = c(-0.8, 0.8))

## # A tibble: 8 × 5
##   group   key           pvalue     padj sign 
##   <fct>   <fct>          <dbl>    <dbl> <lgl>
## 1 nonCTCF PT_24h     0.154     0.768    FALSE
## 2 nonCTCF CTCFEL_0h  0.00287   0.0201   TRUE 
## 3 nonCTCF CTCFEL_6h  0.0393    0.236    FALSE
## 4 nonCTCF CTCFEL_24h 0.159     0.768    FALSE
## 5 CTCF    PT_24h     0.0000526 0.000421 TRUE 
## 6 CTCF    CTCFEL_0h  0.319     0.768    FALSE
## 7 CTCF    CTCFEL_6h  0.163     0.768    FALSE
## 8 CTCF    CTCFEL_24h 0.159     0.768    FALSE
## # A tibble: 4 × 5
##   group key         pvalue   padj sign 
##   <chr> <chr>        <dbl>  <dbl> <lgl>
## 1 CTCF  PT_24h     0.0693  0.208  FALSE
## 2 CTCF  CTCFEL_0h  0.00637 0.0255 TRUE 
## 3 CTCF  CTCFEL_6h  0.740   1      FALSE
## 4 CTCF  CTCFEL_24h 0.893   1      FALSE
## # A tibble: 4 × 3
##   group key         pvalue
##   <chr> <chr>        <dbl>
## 1 CTCF  PT_24h     0.0693 
## 2 CTCF  CTCFEL_0h  0.00637
## 3 CTCF  CTCFEL_6h  0.740  
## 4 CTCF  CTCFEL_24h 0.893
tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "CTCFEL_0h", "CTCFEL_6h", 
                                            "CTCFEL_24h", "CTCFEL_96h"), 
                                          ylim = c(-0.8, 0.8))

## # A tibble: 4 × 5
##   group   key          pvalue    padj sign 
##   <fct>   <fct>         <dbl>   <dbl> <lgl>
## 1 nonCTCF CTCFEL_6h  0.0770   0.0770  FALSE
## 2 nonCTCF CTCFEL_24h 0.0266   0.0532  FALSE
## 3 CTCF    CTCFEL_6h  0.00306  0.00918 TRUE 
## 4 CTCF    CTCFEL_24h 0.000732 0.00293 TRUE 
## # A tibble: 2 × 5
##   group key           pvalue     padj sign 
##   <chr> <chr>          <dbl>    <dbl> <lgl>
## 1 CTCF  CTCFEL_6h  0.000538  0.000538 TRUE 
## 2 CTCF  CTCFEL_24h 0.0000642 0.000128 TRUE
tib_stat <- bind_rows(tib_stat, tmp)

PlotBordersWithConfidenceIntervals(tib_damid, 
                                   c("PT_0h", "CTCFNQ_0h", "CTCFNQ_6h", 
                                     "CTCFNQ_24h", "CTCFNQ_96h"))

## # A tibble: 4 × 5
##   group   key             pvalue        padj sign 
##   <fct>   <fct>            <dbl>       <dbl> <lgl>
## 1 nonCTCF CTCFNQ_6h  0.00186     0.00357     TRUE 
## 2 nonCTCF CTCFNQ_24h 0.000000175 0.000000699 TRUE 
## 3 CTCF    CTCFNQ_6h  0.00179     0.00357     TRUE 
## 4 CTCF    CTCFNQ_24h 0.000122    0.000365    TRUE 
## # A tibble: 2 × 5
##   group key        pvalue  padj sign 
##   <chr> <chr>       <dbl> <dbl> <lgl>
## 1 CTCF  CTCFNQ_6h   0.897     1 FALSE
## 2 CTCF  CTCFNQ_24h  0.694     1 FALSE
## # A tibble: 2 × 3
##   group key        pvalue
##   <chr> <chr>       <dbl>
## 1 CTCF  CTCFNQ_6h   0.897
## 2 CTCF  CTCFNQ_24h  0.694
tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "WAPL_0h", "WAPL_6h", 
                                            "WAPL_24h", "WAPL_96h"),
                                          ylim = c(-0.95, 0.7))

## # A tibble: 4 × 5
##   group   key             pvalue          padj sign 
##   <fct>   <fct>            <dbl>         <dbl> <lgl>
## 1 nonCTCF WAPL_6h  0.00563       0.00563       TRUE 
## 2 nonCTCF WAPL_24h 0.00000217    0.00000434    TRUE 
## 3 CTCF    WAPL_6h  0.0000000983  0.000000295   TRUE 
## 4 CTCF    WAPL_24h 0.00000000192 0.00000000768 TRUE 
## # A tibble: 2 × 5
##   group key       pvalue    padj sign 
##   <chr> <chr>      <dbl>   <dbl> <lgl>
## 1 CTCF  WAPL_6h  0.00292 0.00583 TRUE 
## 2 CTCF  WAPL_24h 0.0111  0.0111  TRUE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "CTCFWAPL_0h", "CTCFWAPL_6h", 
                                            "CTCFWAPL_24h", "CTCFWAPL_96h"),
                                          ylim = c(-0.95, 0.7))

## # A tibble: 4 × 5
##   group   key           pvalue    padj sign 
##   <fct>   <fct>          <dbl>   <dbl> <lgl>
## 1 nonCTCF CTCFWAPL_6h  0.506   1       FALSE
## 2 nonCTCF CTCFWAPL_24h 0.988   1       FALSE
## 3 CTCF    CTCFWAPL_6h  0.00133 0.00533 TRUE 
## 4 CTCF    CTCFWAPL_24h 0.0228  0.0684  FALSE
## # A tibble: 2 × 5
##   group key          pvalue   padj sign 
##   <chr> <chr>         <dbl>  <dbl> <lgl>
## 1 CTCF  CTCFWAPL_6h  0.0357 0.0714 FALSE
## 2 CTCF  CTCFWAPL_24h 0.0727 0.0727 FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "RAD21_0h", "RAD21_6h", 
                                            "RAD21_24h"),
                                          ylim = c(-0.95, 0.7))

## # A tibble: 4 × 5
##   group   key          pvalue     padj sign 
##   <fct>   <fct>         <dbl>    <dbl> <lgl>
## 1 nonCTCF RAD21_6h  0.000121  0.000363 TRUE 
## 2 nonCTCF RAD21_24h 0.0000471 0.000188 TRUE 
## 3 CTCF    RAD21_6h  0.00245   0.00489  TRUE 
## 4 CTCF    RAD21_24h 0.300     0.300    FALSE
## # A tibble: 2 × 5
##   group key       pvalue   padj sign 
##   <chr> <chr>      <dbl>  <dbl> <lgl>
## 1 CTCF  RAD21_6h  0.396  0.396  FALSE
## 2 CTCF  RAD21_24h 0.0329 0.0658 FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tib_stat %>% 
  mutate(padj = p.adjust(pvalue),
         sign = padj < 0.05) %>%
  print(n = 40)
## # A tibble: 9 × 5
##   group key             pvalue     padj sign 
##   <chr> <chr>            <dbl>    <dbl> <lgl>
## 1 CTCF  PT_24h       0.0693    0.208    FALSE
## 2 CTCF  CTCFEL_6h    0.000538  0.00431  TRUE 
## 3 CTCF  CTCFEL_24h   0.0000642 0.000578 TRUE 
## 4 CTCF  WAPL_6h      0.00292   0.0204   TRUE 
## 5 CTCF  WAPL_24h     0.0111    0.0668   FALSE
## 6 CTCF  CTCFWAPL_6h  0.0357    0.164    FALSE
## 7 CTCF  CTCFWAPL_24h 0.0727    0.208    FALSE
## 8 CTCF  RAD21_6h     0.396     0.396    FALSE
## 9 CTCF  RAD21_24h    0.0329    0.164    FALSE
# CTCF + orientation vs non-CTCF
tib_stat <- tibble()

PlotBordersWithConfidenceIntervals(tib_damid, 
                                   c("PT_0h"),
                                   group = "border_ctcf_strand",
                                   ylim = c(-0.80, 0.80),
                                   keep_PT = T)

## NULL
tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "PT_24h"),
                                          group = "border_ctcf_strand",
                                          ylim = c(-0.80, 0.80),
                                          keep_PT = T)

## # A tibble: 4 × 5
##   group     key     pvalue   padj sign 
##   <fct>     <fct>    <dbl>  <dbl> <lgl>
## 1 nonCTCF   PT_24h 0.154   0.307  FALSE
## 2 outwards  PT_24h 0.221   0.307  FALSE
## 3 inwards   PT_24h 0.00861 0.0258 TRUE 
## 4 ambiguous PT_24h 0.00284 0.0114 TRUE 
## # A tibble: 3 × 5
##   group     key    pvalue  padj sign 
##   <chr>     <chr>   <dbl> <dbl> <lgl>
## 1 outwards  PT_24h 0.544  0.544 FALSE
## 2 inwards   PT_24h 0.224  0.447 FALSE
## 3 ambiguous PT_24h 0.0920 0.276 FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "CTCFEL_0h", "CTCFEL_6h", 
                                            "CTCFEL_24h", "CTCFEL_96h"),
                                          group = "border_ctcf_strand",
                                          ylim = c(-0.80, 0.80))

## # A tibble: 8 × 5
##   group     key         pvalue   padj sign 
##   <fct>     <fct>        <dbl>  <dbl> <lgl>
## 1 nonCTCF   CTCFEL_6h  0.0770  0.383  FALSE
## 2 nonCTCF   CTCFEL_24h 0.0266  0.186  FALSE
## 3 outwards  CTCFEL_6h  0.168   0.383  FALSE
## 4 outwards  CTCFEL_24h 0.235   0.383  FALSE
## 5 inwards   CTCFEL_6h  0.104   0.383  FALSE
## 6 inwards   CTCFEL_24h 0.0765  0.383  FALSE
## 7 ambiguous CTCFEL_6h  0.0396  0.238  FALSE
## 8 ambiguous CTCFEL_24h 0.00817 0.0653 FALSE
## # A tibble: 6 × 5
##   group     key         pvalue    padj sign 
##   <chr>     <chr>        <dbl>   <dbl> <lgl>
## 1 outwards  CTCFEL_6h  0.0478  0.0945  FALSE
## 2 outwards  CTCFEL_24h 0.0473  0.0945  FALSE
## 3 inwards   CTCFEL_6h  0.0231  0.0693  FALSE
## 4 inwards   CTCFEL_24h 0.00662 0.0315  TRUE 
## 5 ambiguous CTCFEL_6h  0.00630 0.0315  TRUE 
## 6 ambiguous CTCFEL_24h 0.00108 0.00646 TRUE
tib_stat <- bind_rows(tib_stat, tmp)

PlotBordersWithConfidenceIntervals(tib_damid, 
                                   c("PT_0h", "CTCFNQ_0h", "CTCFNQ_6h", 
                                     "CTCFNQ_24h", "CTCFNQ_96h"),
                                   group = "border_ctcf_strand",
                                   ylim = c(-0.80, 0.80))

## # A tibble: 8 × 5
##   group     key             pvalue       padj sign 
##   <fct>     <fct>            <dbl>      <dbl> <lgl>
## 1 nonCTCF   CTCFNQ_6h  0.00186     0.0130     TRUE 
## 2 nonCTCF   CTCFNQ_24h 0.000000175 0.00000140 TRUE 
## 3 outwards  CTCFNQ_6h  0.0146      0.0691     FALSE
## 4 outwards  CTCFNQ_24h 0.0138      0.0691     FALSE
## 5 inwards   CTCFNQ_6h  0.0152      0.0691     FALSE
## 6 inwards   CTCFNQ_24h 0.0912      0.182      FALSE
## 7 ambiguous CTCFNQ_6h  0.444       0.444      FALSE
## 8 ambiguous CTCFNQ_24h 0.00804     0.0482     TRUE 
## # A tibble: 6 × 5
##   group     key        pvalue  padj sign 
##   <chr>     <chr>       <dbl> <dbl> <lgl>
## 1 outwards  CTCFNQ_6h   0.385     1 FALSE
## 2 outwards  CTCFNQ_24h  0.458     1 FALSE
## 3 inwards   CTCFNQ_6h   0.356     1 FALSE
## 4 inwards   CTCFNQ_24h  0.480     1 FALSE
## 5 ambiguous CTCFNQ_6h   0.284     1 FALSE
## 6 ambiguous CTCFNQ_24h  0.576     1 FALSE
## # A tibble: 6 × 3
##   group     key        pvalue
##   <chr>     <chr>       <dbl>
## 1 outwards  CTCFNQ_6h   0.385
## 2 outwards  CTCFNQ_24h  0.458
## 3 inwards   CTCFNQ_6h   0.356
## 4 inwards   CTCFNQ_24h  0.480
## 5 ambiguous CTCFNQ_6h   0.284
## 6 ambiguous CTCFNQ_24h  0.576
tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "WAPL_0h", "WAPL_6h", 
                                            "WAPL_24h", "WAPL_96h"),
                                          group = "border_ctcf_strand",
                                          ylim = c(-1.05, 0.75))

## # A tibble: 8 × 5
##   group     key          pvalue      padj sign 
##   <fct>     <fct>         <dbl>     <dbl> <lgl>
## 1 nonCTCF   WAPL_6h  0.00563    0.0169    TRUE 
## 2 nonCTCF   WAPL_24h 0.00000217 0.0000152 TRUE 
## 3 outwards  WAPL_6h  0.388      0.388     FALSE
## 4 outwards  WAPL_24h 0.134      0.268     FALSE
## 5 inwards   WAPL_6h  0.0000349  0.000209  TRUE 
## 6 inwards   WAPL_24h 0.00000156 0.0000124 TRUE 
## 7 ambiguous WAPL_6h  0.0000879  0.000439  TRUE 
## 8 ambiguous WAPL_24h 0.000120   0.000479  TRUE 
## # A tibble: 6 × 5
##   group     key        pvalue    padj sign 
##   <chr>     <chr>       <dbl>   <dbl> <lgl>
## 1 outwards  WAPL_6h  0.901    1       FALSE
## 2 outwards  WAPL_24h 0.847    1       FALSE
## 3 inwards   WAPL_6h  0.00512  0.0256  TRUE 
## 4 inwards   WAPL_24h 0.000775 0.00465 TRUE 
## 5 ambiguous WAPL_6h  0.00783  0.0313  TRUE 
## 6 ambiguous WAPL_24h 0.153    0.459   FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "CTCFWAPL_0h", "CTCFWAPL_6h", 
                                            "CTCFWAPL_24h", "CTCFWAPL_96h"),
                                          group = "border_ctcf_strand",
                                          ylim = c(-1.05, 0.75))

## # A tibble: 8 × 5
##   group     key          pvalue  padj sign 
##   <fct>     <fct>         <dbl> <dbl> <lgl>
## 1 nonCTCF   CTCFWAPL_6h  0.506  1     FALSE
## 2 nonCTCF   CTCFWAPL_24h 0.988  1     FALSE
## 3 outwards  CTCFWAPL_6h  0.0526 0.368 FALSE
## 4 outwards  CTCFWAPL_24h 0.543  1     FALSE
## 5 inwards   CTCFWAPL_6h  0.112  0.560 FALSE
## 6 inwards   CTCFWAPL_24h 0.157  0.627 FALSE
## 7 ambiguous CTCFWAPL_6h  0.0396 0.317 FALSE
## 8 ambiguous CTCFWAPL_24h 0.0639 0.383 FALSE
## # A tibble: 6 × 5
##   group     key          pvalue  padj sign 
##   <chr>     <chr>         <dbl> <dbl> <lgl>
## 1 outwards  CTCFWAPL_6h  0.111  0.555 FALSE
## 2 outwards  CTCFWAPL_24h 0.574  0.621 FALSE
## 3 inwards   CTCFWAPL_6h  0.207  0.621 FALSE
## 4 inwards   CTCFWAPL_24h 0.259  0.621 FALSE
## 5 ambiguous CTCFWAPL_6h  0.133  0.555 FALSE
## 6 ambiguous CTCFWAPL_24h 0.0806 0.484 FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "RAD21_0h", "RAD21_6h", 
                                            "RAD21_24h"),
                                          group = "border_ctcf_strand",
                                          ylim = c(-1.05, 0.75))

## # A tibble: 8 × 5
##   group     key          pvalue     padj sign 
##   <fct>     <fct>         <dbl>    <dbl> <lgl>
## 1 nonCTCF   RAD21_6h  0.000121  0.000847 TRUE 
## 2 nonCTCF   RAD21_24h 0.0000471 0.000377 TRUE 
## 3 outwards  RAD21_6h  0.113     0.563    FALSE
## 4 outwards  RAD21_24h 0.263     0.789    FALSE
## 5 inwards   RAD21_6h  0.151     0.606    FALSE
## 6 inwards   RAD21_24h 0.705     1        FALSE
## 7 ambiguous RAD21_6h  0.0356    0.214    FALSE
## 8 ambiguous RAD21_24h 0.605     1        FALSE
## # A tibble: 6 × 5
##   group     key       pvalue  padj sign 
##   <chr>     <chr>      <dbl> <dbl> <lgl>
## 1 outwards  RAD21_6h  0.934  1     FALSE
## 2 outwards  RAD21_24h 0.333  1     FALSE
## 3 inwards   RAD21_6h  0.399  1     FALSE
## 4 inwards   RAD21_24h 0.119  0.593 FALSE
## 5 ambiguous RAD21_6h  0.489  1     FALSE
## 6 ambiguous RAD21_24h 0.0868 0.521 FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tib_stat %>% 
  mutate(padj = p.adjust(pvalue),
         sign = padj < 0.05) %>%
  print(n = 40) 
## # A tibble: 27 × 5
##    group     key            pvalue   padj sign 
##    <chr>     <chr>           <dbl>  <dbl> <lgl>
##  1 outwards  PT_24h       0.544    1      FALSE
##  2 inwards   PT_24h       0.224    1      FALSE
##  3 ambiguous PT_24h       0.0920   1      FALSE
##  4 outwards  CTCFEL_6h    0.0478   0.945  FALSE
##  5 outwards  CTCFEL_24h   0.0473   0.945  FALSE
##  6 inwards   CTCFEL_6h    0.0231   0.485  FALSE
##  7 inwards   CTCFEL_24h   0.00662  0.152  FALSE
##  8 ambiguous CTCFEL_6h    0.00630  0.151  FALSE
##  9 ambiguous CTCFEL_24h   0.00108  0.0280 TRUE 
## 10 outwards  WAPL_6h      0.901    1      FALSE
## 11 outwards  WAPL_24h     0.847    1      FALSE
## 12 inwards   WAPL_6h      0.00512  0.128  FALSE
## 13 inwards   WAPL_24h     0.000775 0.0209 TRUE 
## 14 ambiguous WAPL_6h      0.00783  0.172  FALSE
## 15 ambiguous WAPL_24h     0.153    1      FALSE
## 16 outwards  CTCFWAPL_6h  0.111    1      FALSE
## 17 outwards  CTCFWAPL_24h 0.574    1      FALSE
## 18 inwards   CTCFWAPL_6h  0.207    1      FALSE
## 19 inwards   CTCFWAPL_24h 0.259    1      FALSE
## 20 ambiguous CTCFWAPL_6h  0.133    1      FALSE
## 21 ambiguous CTCFWAPL_24h 0.0806   1      FALSE
## 22 outwards  RAD21_6h     0.934    1      FALSE
## 23 outwards  RAD21_24h    0.333    1      FALSE
## 24 inwards   RAD21_6h     0.399    1      FALSE
## 25 inwards   RAD21_24h    0.119    1      FALSE
## 26 ambiguous RAD21_6h     0.489    1      FALSE
## 27 ambiguous RAD21_24h    0.0868   1      FALSE

As mentioned, also plot borders with multiple CTCF sites.

# CTCF + number vs non-CTCF
tib_stat <- tibble()

PlotBordersWithConfidenceIntervals(tib_damid, 
                                   c("PT_0h"),
                                   group = "border_ctcf_n",
                                   ylim = c(-1.25, 0.90),
                                   keep_PT = T)

## NULL
tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "PT_24h"),
                                          group = "border_ctcf_n",
                                          ylim = c(-1.25, 0.90),
                                          keep_PT = T)

## # A tibble: 4 × 5
##   group   key      pvalue    padj sign 
##   <fct>   <fct>     <dbl>   <dbl> <lgl>
## 1 nonCTCF PT_24h 0.154    0.307   FALSE
## 2 1       PT_24h 0.000641 0.00256 TRUE 
## 3 2       PT_24h 0.0386   0.116   FALSE
## 4 >=3     PT_24h 0.625    0.625   FALSE
## # A tibble: 3 × 5
##   group key    pvalue  padj sign 
##   <chr> <chr>   <dbl> <dbl> <lgl>
## 1 1     PT_24h  0.119 0.358 FALSE
## 2 2     PT_24h  0.174 0.358 FALSE
## 3 >=3   PT_24h  0.807 0.807 FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "CTCFEL_0h", "CTCFEL_6h", 
                                            "CTCFEL_24h", "CTCFEL_96h"),
                                          group = "border_ctcf_n",
                                          ylim = c(-1.25, 0.90))

## # A tibble: 8 × 5
##   group   key           pvalue     padj sign 
##   <fct>   <fct>          <dbl>    <dbl> <lgl>
## 1 nonCTCF CTCFEL_6h  0.0770    0.312    FALSE
## 2 nonCTCF CTCFEL_24h 0.0266    0.160    FALSE
## 3 1       CTCFEL_6h  0.137     0.375    FALSE
## 4 1       CTCFEL_24h 0.185     0.375    FALSE
## 5 2       CTCFEL_6h  0.00299   0.0209   TRUE 
## 6 2       CTCFEL_24h 0.0000470 0.000376 TRUE 
## 7 >=3     CTCFEL_6h  0.125     0.375    FALSE
## 8 >=3     CTCFEL_24h 0.0625    0.312    FALSE
## # A tibble: 6 × 5
##   group key            pvalue      padj sign 
##   <chr> <chr>           <dbl>     <dbl> <lgl>
## 1 1     CTCFEL_6h  0.0245     0.0515    FALSE
## 2 1     CTCFEL_24h 0.0172     0.0515    FALSE
## 3 2     CTCFEL_6h  0.000208   0.00104   TRUE 
## 4 2     CTCFEL_24h 0.00000271 0.0000162 TRUE 
## 5 >=3   CTCFEL_6h  0.0328     0.0515    FALSE
## 6 >=3   CTCFEL_24h 0.00564    0.0226    TRUE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "WAPL_0h", "WAPL_6h", 
                                            "WAPL_24h", "WAPL_96h"),
                                          group = "border_ctcf_n",
                                          ylim = c(-1.25, 0.90))

## # A tibble: 8 × 5
##   group   key           pvalue       padj sign 
##   <fct>   <fct>          <dbl>      <dbl> <lgl>
## 1 nonCTCF WAPL_6h  0.00563     0.0225     TRUE 
## 2 nonCTCF WAPL_24h 0.00000217  0.0000152  TRUE 
## 3 1       WAPL_6h  0.00000926  0.0000556  TRUE 
## 4 1       WAPL_24h 0.000000743 0.00000594 TRUE 
## 5 2       WAPL_6h  0.0129      0.0388     TRUE 
## 6 2       WAPL_24h 0.000607    0.00304    TRUE 
## 7 >=3     WAPL_6h  0.0625      0.125      FALSE
## 8 >=3     WAPL_24h 0.188       0.188      FALSE
## # A tibble: 6 × 5
##   group key      pvalue   padj sign 
##   <chr> <chr>     <dbl>  <dbl> <lgl>
## 1 1     WAPL_6h  0.0148 0.0886 FALSE
## 2 1     WAPL_24h 0.0541 0.162  FALSE
## 3 2     WAPL_6h  0.0740 0.162  FALSE
## 4 2     WAPL_24h 0.0215 0.107  FALSE
## 5 >=3   WAPL_6h  0.0253 0.107  FALSE
## 6 >=3   WAPL_24h 0.369  0.369  FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "CTCFWAPL_0h", "CTCFWAPL_6h", 
                                            "CTCFWAPL_24h", "CTCFWAPL_96h"),
                                          group = "border_ctcf_n",
                                          ylim = c(-1.25, 0.90))

## # A tibble: 8 × 5
##   group   key           pvalue   padj sign 
##   <fct>   <fct>          <dbl>  <dbl> <lgl>
## 1 nonCTCF CTCFWAPL_6h  0.506   1      FALSE
## 2 nonCTCF CTCFWAPL_24h 0.988   1      FALSE
## 3 1       CTCFWAPL_6h  0.0585  0.351  FALSE
## 4 1       CTCFWAPL_24h 0.256   1      FALSE
## 5 2       CTCFWAPL_6h  0.00479 0.0383 TRUE 
## 6 2       CTCFWAPL_24h 0.0179  0.125  FALSE
## 7 >=3     CTCFWAPL_6h  0.188   0.938  FALSE
## 8 >=3     CTCFWAPL_24h 0.625   1      FALSE
## # A tibble: 6 × 5
##   group key           pvalue   padj sign 
##   <chr> <chr>          <dbl>  <dbl> <lgl>
## 1 1     CTCFWAPL_6h  0.282   0.903  FALSE
## 2 1     CTCFWAPL_24h 0.336   0.903  FALSE
## 3 2     CTCFWAPL_6h  0.00274 0.0164 TRUE 
## 4 2     CTCFWAPL_24h 0.0105  0.0523 FALSE
## 5 >=3   CTCFWAPL_6h  0.226   0.903  FALSE
## 6 >=3   CTCFWAPL_24h 0.443   0.903  FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "RAD21_0h", "RAD21_6h", 
                                            "RAD21_24h"),
                                          group = "border_ctcf_n",
                                          ylim = c(-1.25, 0.90))

## # A tibble: 8 × 5
##   group   key          pvalue     padj sign 
##   <fct>   <fct>         <dbl>    <dbl> <lgl>
## 1 nonCTCF RAD21_6h  0.000121  0.000847 TRUE 
## 2 nonCTCF RAD21_24h 0.0000471 0.000377 TRUE 
## 3 1       RAD21_6h  0.00132   0.00792  TRUE 
## 4 1       RAD21_24h 0.171     0.856    FALSE
## 5 2       RAD21_6h  0.670     1        FALSE
## 6 2       RAD21_24h 0.815     1        FALSE
## 7 >=3     RAD21_6h  0.812     1        FALSE
## 8 >=3     RAD21_24h 1         1        FALSE
## # A tibble: 6 × 5
##   group key       pvalue  padj sign 
##   <chr> <chr>      <dbl> <dbl> <lgl>
## 1 1     RAD21_6h  0.569  1     FALSE
## 2 1     RAD21_24h 0.0781 0.469 FALSE
## 3 2     RAD21_6h  0.341  1     FALSE
## 4 2     RAD21_24h 0.117  0.584 FALSE
## 5 >=3   RAD21_6h  0.763  1     FALSE
## 6 >=3   RAD21_24h 0.434  1     FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tib_stat %>% 
  mutate(padj = p.adjust(pvalue),
         sign = padj < 0.05) %>%
  print(n = 40)
## # A tibble: 27 × 5
##    group key              pvalue      padj sign 
##    <chr> <chr>             <dbl>     <dbl> <lgl>
##  1 1     PT_24h       0.119      1         FALSE
##  2 2     PT_24h       0.174      1         FALSE
##  3 >=3   PT_24h       0.807      1         FALSE
##  4 1     CTCFEL_6h    0.0245     0.466     FALSE
##  5 1     CTCFEL_24h   0.0172     0.360     FALSE
##  6 2     CTCFEL_6h    0.000208   0.00541   TRUE 
##  7 2     CTCFEL_24h   0.00000271 0.0000730 TRUE 
##  8 >=3   CTCFEL_6h    0.0328     0.557     FALSE
##  9 >=3   CTCFEL_24h   0.00564    0.135     FALSE
## 10 1     WAPL_6h      0.0148     0.325     FALSE
## 11 1     WAPL_24h     0.0541     0.865     FALSE
## 12 2     WAPL_6h      0.0740     1         FALSE
## 13 2     WAPL_24h     0.0215     0.430     FALSE
## 14 >=3   WAPL_6h      0.0253     0.466     FALSE
## 15 >=3   WAPL_24h     0.369      1         FALSE
## 16 1     CTCFWAPL_6h  0.282      1         FALSE
## 17 1     CTCFWAPL_24h 0.336      1         FALSE
## 18 2     CTCFWAPL_6h  0.00274    0.0685    FALSE
## 19 2     CTCFWAPL_24h 0.0105     0.241     FALSE
## 20 >=3   CTCFWAPL_6h  0.226      1         FALSE
## 21 >=3   CTCFWAPL_24h 0.443      1         FALSE
## 22 1     RAD21_6h     0.569      1         FALSE
## 23 1     RAD21_24h    0.0781     1         FALSE
## 24 2     RAD21_6h     0.341      1         FALSE
## 25 2     RAD21_24h    0.117      1         FALSE
## 26 >=3   RAD21_6h     0.763      1         FALSE
## 27 >=3   RAD21_24h    0.434      1         FALSE

Good.

4. Save data

All relevant data is saved from the matching document looking at borders without
active genes.

# 
# saveRDS(metadata.damid, 
#         file.path(output.dir, "metadata_damid.rds"))
# saveRDS(damid, 
#         file.path(output.dir, "damid.rds"))
# saveRDS(bin.size, 
#         file.path(output.dir, "bin_size.rds"))
# 
# # Also, as tsv file
# tib_damid <- as_tibble(damid) %>%
#   dplyr::select(-width, -strand) %>%
#   dplyr::select(-contains("CTCF_"), -contains("CTCFNQ_"))
# print(names(tib_damid))
# 
# tib_damid <- tib_damid  %>%
#   rename_all(function(x) str_replace(x, "CTCFEL", "CTCF")) 
# print(names(tib_damid))
# 
# write_tsv(tib_damid,
#           file.path(output.dir, "damid_average_replicates.tsv"))
# 
# # Also, for replicates individually
# damid_replicates <- as_tibble(damid_replicates) %>%
#   dplyr::select(-width, -strand, 
#                 -contains("NQ"))
# print(names(damid_replicates))
# 
# damid_replicates <- damid_replicates %>%
#   rename_all(~ c("seqnames", "start", "end",
#                  "PT_0h_r1", "PT_0h_r2",
#                  "PT_24h_r1", "PT_24h_r2",
#                  "CTCF_0h_r1", "CTCF_0h_r2",
#                  "CTCF_6h_r1", "CTCF_6h_r2",
#                  "CTCF_24h_r1", "CTCF_24h_r2",
#                  "CTCF_96h_r1", "CTCF_96h_r2",
#                  "WAPL_0h_r1", "WAPL_0h_r2", "WAPL_0h_r3",
#                  "WAPL_6h_r1", "WAPL_6h_r2", "WAPL_6h_r3",
#                  "WAPL_24h_r1", "WAPL_24h_r2", "WAPL_24h_r3",
#                  "WAPL_96h_r1", "WAPL_96h_r2", "WAPL_96h_r3",
#                  "CTCFWAPL_0h_r1", "CTCFWAPL_0h_r2", 
#                  "CTCFWAPL_0h_r3", "CTCFWAPL_0h_r4",
#                  "CTCFWAPL_6h_r1", "CTCFWAPL_6h_r2", 
#                  "CTCFWAPL_6h_r3", "CTCFWAPL_6h_r4",
#                  "CTCFWAPL_24h_r1", 
#                  "CTCFWAPL_24h_r3", "CTCFWAPL_24h_r4",
#                  "CTCFWAPL_96h_r1", "CTCFWAPL_96h_r2", 
#                  "CTCFWAPL_96h_r3",
#                  "RAD21_0h_r1", "RAD21_0h_r2",
#                  "RAD21_6h_r1", "RAD21_6h_r2",
#                  "RAD21_24h_r1", "RAD21_24h_r2"))
# print(names(damid_replicates))
# 
# write_tsv(damid_replicates,
#           file.path(output.dir, "damid_individual_replicates.tsv"))

Conclusion

We generally observe the same effects at border with active genes. The effect size seems a bit smaller, but it’s difficult to tell.

SessionInfo

sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] knitr_1.33           corrr_0.4.3          broom_0.7.9         
##  [4] ggbeeswarm_0.6.0     M3C_1.12.0           yaml_2.2.1          
##  [7] caTools_1.18.2       rtracklayer_1.50.0   GenomicRanges_1.42.0
## [10] GenomeInfoDb_1.26.7  IRanges_2.24.1       S4Vectors_0.28.1    
## [13] BiocGenerics_0.36.1  forcats_0.5.1        stringr_1.4.0       
## [16] dplyr_1.0.7          purrr_0.3.4          readr_2.0.0         
## [19] tidyr_1.1.3          tibble_3.1.6         ggplot2_3.3.5       
## [22] tidyverse_1.3.1     
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7                matrixStats_0.60.0         
##  [3] fs_1.5.0                    lubridate_1.7.10           
##  [5] RColorBrewer_1.1-2          doParallel_1.0.16          
##  [7] httr_1.4.2                  tools_4.0.5                
##  [9] backports_1.2.1             bslib_0.2.5.1              
## [11] utf8_1.2.2                  R6_2.5.1                   
## [13] vipor_0.4.5                 DBI_1.1.1                  
## [15] colorspace_2.0-2            withr_2.4.2                
## [17] tidyselect_1.1.1            compiler_4.0.5             
## [19] cli_3.1.0                   rvest_1.0.1                
## [21] Biobase_2.50.0              xml2_1.3.2                 
## [23] DelayedArray_0.16.3         labeling_0.4.2             
## [25] sass_0.4.0                  scales_1.1.1               
## [27] askpass_1.1                 digest_0.6.28              
## [29] Rsamtools_2.6.0             rmarkdown_2.11             
## [31] XVector_0.30.0              pkgconfig_2.0.3            
## [33] htmltools_0.5.1.1           umap_0.2.7.0               
## [35] MatrixGenerics_1.2.1        highr_0.9                  
## [37] dbplyr_2.1.1                rlang_0.4.12               
## [39] readxl_1.3.1                rstudioapi_0.13            
## [41] farver_2.1.0                jquerylib_0.1.4            
## [43] generics_0.1.0              jsonlite_1.7.2             
## [45] BiocParallel_1.24.1         RCurl_1.98-1.3             
## [47] magrittr_2.0.1              GenomeInfoDbData_1.2.4     
## [49] Matrix_1.3-4                Rcpp_1.0.7                 
## [51] munsell_0.5.0               fansi_0.5.0                
## [53] reticulate_1.20             lifecycle_1.0.1            
## [55] stringi_1.7.3               SummarizedExperiment_1.20.0
## [57] zlibbioc_1.36.0             Rtsne_0.15                 
## [59] matrixcalc_1.0-5            grid_4.0.5                 
## [61] doSNOW_1.0.19               crayon_1.4.2               
## [63] lattice_0.20-44             Biostrings_2.58.0          
## [65] haven_2.4.1                 hms_1.1.0                  
## [67] pillar_1.6.4                corpcor_1.6.9              
## [69] codetools_0.2-18            reprex_2.0.0               
## [71] XML_3.99-0.6                glue_1.5.0                 
## [73] evaluate_0.14               modelr_0.1.8               
## [75] png_0.1-7                   vctrs_0.3.8                
## [77] tzdb_0.1.2                  foreach_1.5.1              
## [79] cellranger_1.1.0            openssl_1.4.4              
## [81] gtable_0.3.0                assertthat_0.2.1           
## [83] xfun_0.24                   RSpectra_0.16-0            
## [85] snow_0.4-3                  iterators_1.0.13           
## [87] beeswarm_0.4.0              GenomicAlignments_1.26.0   
## [89] cluster_2.1.2               ellipsis_0.3.2